; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -S \
; RUN:     | FileCheck %s
; RUN: opt < %s -passes=asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \
; RUN:     | FileCheck %s -check-prefix=DISABLED

; Support ASan instrumentation for constant-mask llvm.masked.{load,store}

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

;;;;;;;;;;;;;;;; STORE
declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) argmemonly nounwind
declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) argmemonly nounwind
declare void @llvm.masked.store.v4p0.p0(<4 x ptr>, ptr, i32, <4 x i1>) argmemonly nounwind

define void @store.v4f32.1110(ptr %p, <4 x float> %arg) sanitize_address {
; CHECK-LABEL: @store.v4f32.1110(
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP2]])
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 1
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP4]])
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 2
; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP6]])
; CHECK-NEXT:    tail call void @llvm.masked.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
; CHECK-NEXT:    ret void
;
; DISABLED-LABEL: @store.v4f32.1110(
; DISABLED-NEXT:    tail call void @llvm.masked.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
; DISABLED-NEXT:    ret void
;
  tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 false>)
  ret void
}

define void @store.v8i32.10010110(ptr %p, <8 x i32> %arg) sanitize_address {
; CHECK-LABEL: @store.v8i32.10010110(
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <8 x i32>, ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP2]])
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 3
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP4]])
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 5
; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP6]])
; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 6
; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP8]])
; CHECK-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> [[ARG:%.*]], ptr [[P]], i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
; CHECK-NEXT:    ret void
;
; DISABLED-LABEL: @store.v8i32.10010110(
; DISABLED-NEXT:    tail call void @llvm.masked.store.v8i32.p0(<8 x i32> [[ARG:%.*]], ptr [[P:%.*]], i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
; DISABLED-NEXT:    ret void
;
  tail call void @llvm.masked.store.v8i32.p0(<8 x i32> %arg, ptr %p, i32 8, <8 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false>)
  ret void
}

define void @store.v4i64.0001(ptr %p, <4 x ptr> %arg) sanitize_address {
; CHECK-LABEL: @store.v4i64.0001(
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x ptr>, ptr [[P:%.*]], i64 0, i64 3
; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT:    call void @__asan_store8(i64 [[TMP2]])
; CHECK-NEXT:    tail call void @llvm.masked.store.v4p0.p0(<4 x ptr> [[ARG:%.*]], ptr [[P]], i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
; CHECK-NEXT:    ret void
;
; DISABLED-LABEL: @store.v4i64.0001(
; DISABLED-NEXT:    tail call void @llvm.masked.store.v4p0.p0(<4 x ptr> [[ARG:%.*]], ptr [[P:%.*]], i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
; DISABLED-NEXT:    ret void
;
  tail call void @llvm.masked.store.v4p0.p0(<4 x ptr> %arg, ptr %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
  ret void
}

define void @store.v4f32.variable(ptr %p, <4 x float> %arg, <4 x i1> %mask) sanitize_address {
; CHECK-LABEL: @store.v4f32.variable(
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 0
; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP5:%.*]]
; CHECK:       2:
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP4]])
; CHECK-NEXT:    br label [[TMP5]]
; CHECK:       5:
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[MASK]], i64 1
; CHECK-NEXT:    br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP10:%.*]]
; CHECK:       7:
; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 1
; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP9]])
; CHECK-NEXT:    br label [[TMP10]]
; CHECK:       10:
; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[MASK]], i64 2
; CHECK-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]]
; CHECK:       12:
; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 2
; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP14]])
; CHECK-NEXT:    br label [[TMP15]]
; CHECK:       15:
; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[MASK]], i64 3
; CHECK-NEXT:    br i1 [[TMP16]], label [[TMP17:%.*]], label [[TMP20:%.*]]
; CHECK:       17:
; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 3
; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP19]])
; CHECK-NEXT:    br label [[TMP20]]
; CHECK:       20:
; CHECK-NEXT:    tail call void @llvm.masked.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], i32 4, <4 x i1> [[MASK]])
; CHECK-NEXT:    ret void
;
; DISABLED-LABEL: @store.v4f32.variable(
; DISABLED-NEXT:    tail call void @llvm.masked.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], i32 4, <4 x i1> [[MASK:%.*]])
; DISABLED-NEXT:    ret void
;
  tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> %mask)
  ret void
}

;; Store using two masked.stores, which should instrument them both.
define void @store.v4f32.1010.split(ptr %p, <4 x float> %arg) sanitize_address {
; CHECK-LABEL: @store.v4f32.1010.split(
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 2
; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP2]])
; CHECK-NEXT:    tail call void @llvm.masked.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
; CHECK-NEXT:    ret void
;
; DISABLED-LABEL: @store.v4f32.1010.split(
; DISABLED-NEXT:    tail call void @llvm.masked.store.v4f32.p0(<4 x float> [[ARG:%.*]], ptr [[P:%.*]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
; DISABLED-NEXT:    ret void
;
  tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
  ret void
}

;; Store using a masked.store after a full store. Shouldn't instrument the second one.
define void @store.v4f32.0010.after.full.store(ptr %p, <4 x float> %arg) sanitize_address {
; CHECK-LABEL: @store.v4f32.0010.after.full.store(
; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT:    call void @__asan_store16(i64 [[TMP1]])
; CHECK-NEXT:    store <4 x float> [[ARG:%.*]], ptr [[P]], align 16
; CHECK-NEXT:    tail call void @llvm.masked.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
; CHECK-NEXT:    ret void
;
; DISABLED-LABEL: @store.v4f32.0010.after.full.store(
; DISABLED-NEXT:    store <4 x float> [[ARG:%.*]], ptr [[P:%.*]], align 16
; DISABLED-NEXT:    tail call void @llvm.masked.store.v4f32.p0(<4 x float> [[ARG]], ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
; DISABLED-NEXT:    ret void
;
  store <4 x float> %arg, ptr %p
  tail call void @llvm.masked.store.v4f32.p0(<4 x float> %arg, ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 true, i1 false>)
  ret void
}

;;;;;;;;;;;;;;;; LOAD
declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) argmemonly nounwind
declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) argmemonly nounwind
declare <4 x ptr> @llvm.masked.load.v4p0.p0(ptr, i32, <4 x i1>, <4 x ptr>) argmemonly nounwind

define <8 x i32> @load.v8i32.11100001(ptr %p, <8 x i32> %arg) sanitize_address {
; CHECK-LABEL: @load.v8i32.11100001(
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <8 x i32>, ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP2]])
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 1
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP4]])
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 2
; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP6]])
; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <8 x i32>, ptr [[P]], i64 0, i64 7
; CHECK-NEXT:    [[TMP8:%.*]] = ptrtoint ptr [[TMP7]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP8]])
; CHECK-NEXT:    [[RES:%.*]] = tail call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[P]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> [[ARG:%.*]])
; CHECK-NEXT:    ret <8 x i32> [[RES]]
;
; DISABLED-LABEL: @load.v8i32.11100001(
; DISABLED-NEXT:    [[RES:%.*]] = tail call <8 x i32> @llvm.masked.load.v8i32.p0(ptr [[P:%.*]], i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> [[ARG:%.*]])
; DISABLED-NEXT:    ret <8 x i32> [[RES]]
;
  %res = tail call <8 x i32> @llvm.masked.load.v8i32.p0(ptr %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true>, <8 x i32> %arg)
  ret <8 x i32> %res
}

define <4 x float> @load.v4f32.1001(ptr %p, <4 x float> %arg) sanitize_address {
; CHECK-LABEL: @load.v4f32.1001(
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP2]])
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 3
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP4]])
; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> [[ARG:%.*]])
; CHECK-NEXT:    ret <4 x float> [[RES]]
;
; DISABLED-LABEL: @load.v4f32.1001(
; DISABLED-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> [[ARG:%.*]])
; DISABLED-NEXT:    ret <4 x float> [[RES]]
;
  %res = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %arg)
  ret <4 x float> %res
}

define <4 x ptr> @load.v4i64.0001(ptr %p, <4 x ptr> %arg) sanitize_address {
; CHECK-LABEL: @load.v4i64.0001(
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x ptr>, ptr [[P:%.*]], i64 0, i64 3
; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT:    call void @__asan_load8(i64 [[TMP2]])
; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[P]], i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x ptr> [[ARG:%.*]])
; CHECK-NEXT:    ret <4 x ptr> [[RES]]
;
; DISABLED-LABEL: @load.v4i64.0001(
; DISABLED-NEXT:    [[RES:%.*]] = tail call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr [[P:%.*]], i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x ptr> [[ARG:%.*]])
; DISABLED-NEXT:    ret <4 x ptr> [[RES]]
;
  %res = tail call <4 x ptr> @llvm.masked.load.v4p0.p0(ptr %p, i32 8, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x ptr> %arg)
  ret <4 x ptr> %res
}

define <4 x float> @load.v4f32.variable(ptr %p, <4 x float> %arg, <4 x i1> %mask) sanitize_address {
; CHECK-LABEL: @load.v4f32.variable(
; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i1> [[MASK:%.*]], i64 0
; CHECK-NEXT:    br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP5:%.*]]
; CHECK:       2:
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP4]])
; CHECK-NEXT:    br label [[TMP5]]
; CHECK:       5:
; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[MASK]], i64 1
; CHECK-NEXT:    br i1 [[TMP6]], label [[TMP7:%.*]], label [[TMP10:%.*]]
; CHECK:       7:
; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 1
; CHECK-NEXT:    [[TMP9:%.*]] = ptrtoint ptr [[TMP8]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP9]])
; CHECK-NEXT:    br label [[TMP10]]
; CHECK:       10:
; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[MASK]], i64 2
; CHECK-NEXT:    br i1 [[TMP11]], label [[TMP12:%.*]], label [[TMP15:%.*]]
; CHECK:       12:
; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 2
; CHECK-NEXT:    [[TMP14:%.*]] = ptrtoint ptr [[TMP13]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP14]])
; CHECK-NEXT:    br label [[TMP15]]
; CHECK:       15:
; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[MASK]], i64 3
; CHECK-NEXT:    br i1 [[TMP16]], label [[TMP17:%.*]], label [[TMP20:%.*]]
; CHECK:       17:
; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 3
; CHECK-NEXT:    [[TMP19:%.*]] = ptrtoint ptr [[TMP18]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP19]])
; CHECK-NEXT:    br label [[TMP20]]
; CHECK:       20:
; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> [[MASK]], <4 x float> [[ARG:%.*]])
; CHECK-NEXT:    ret <4 x float> [[RES]]
;
; DISABLED-LABEL: @load.v4f32.variable(
; DISABLED-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P:%.*]], i32 4, <4 x i1> [[MASK:%.*]], <4 x float> [[ARG:%.*]])
; DISABLED-NEXT:    ret <4 x float> [[RES]]
;
  %res = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> %mask, <4 x float> %arg)
  ret <4 x float> %res
}

;; Load using two masked.loads, which should instrument them both.
define <4 x float> @load.v4f32.1001.split(ptr %p, <4 x float> %arg) sanitize_address {
; CHECK-LABEL: @load.v4f32.1001.split(
; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x float>, ptr [[P:%.*]], i64 0, i64 0
; CHECK-NEXT:    [[TMP2:%.*]] = ptrtoint ptr [[TMP1]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP2]])
; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> [[ARG:%.*]])
; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <4 x float>, ptr [[P]], i64 0, i64 3
; CHECK-NEXT:    [[TMP4:%.*]] = ptrtoint ptr [[TMP3]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP4]])
; CHECK-NEXT:    [[RES2:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> [[RES]])
; CHECK-NEXT:    ret <4 x float> [[RES2]]
;
; DISABLED-LABEL: @load.v4f32.1001.split(
; DISABLED-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P:%.*]], i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> [[ARG:%.*]])
; DISABLED-NEXT:    [[RES2:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> [[RES]])
; DISABLED-NEXT:    ret <4 x float> [[RES2]]
;
  %res = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x float> %arg)
  %res2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %res)
  ret <4 x float> %res2
}

;; Load using a masked.load after a full load. Shouldn't instrument the second one.
define <4 x float> @load.v4f32.1001.after.full.load(ptr %p, <4 x float> %arg) sanitize_address {
; CHECK-LABEL: @load.v4f32.1001.after.full.load(
; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint ptr [[P:%.*]] to i64
; CHECK-NEXT:    call void @__asan_load16(i64 [[TMP1]])
; CHECK-NEXT:    [[RES:%.*]] = load <4 x float>, ptr [[P]], align 16
; CHECK-NEXT:    [[RES2:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> [[ARG:%.*]])
; CHECK-NEXT:    ret <4 x float> [[RES2]]
;
; DISABLED-LABEL: @load.v4f32.1001.after.full.load(
; DISABLED-NEXT:    [[RES:%.*]] = load <4 x float>, ptr [[P:%.*]], align 16
; DISABLED-NEXT:    [[RES2:%.*]] = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[P]], i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> [[ARG:%.*]])
; DISABLED-NEXT:    ret <4 x float> [[RES2]]
;
  %res = load <4 x float>, ptr %p
  %res2 = tail call <4 x float> @llvm.masked.load.v4f32.p0(ptr %p, i32 4, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %arg)
  ret <4 x float> %res2
}

;; Scalable vector tests
;; ---------------------------
declare <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr, i32, <vscale x 4 x i1>, <vscale x 4 x float>)
declare void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float>, ptr, i32, <vscale x 4 x i1>)

define <vscale x 4 x float> @scalable.load.nxv4f32(ptr %p, <vscale x 4 x i1> %mask) sanitize_address {
; CHECK-LABEL: @scalable.load.nxv4f32(
; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
; CHECK:       .split:
; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP7]]
; CHECK:       4:
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
; CHECK-NEXT:    call void @__asan_load4(i64 [[TMP6]])
; CHECK-NEXT:    br label [[TMP7]]
; CHECK:       7:
; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]]
; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
; CHECK:       .split.split:
; CHECK-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[P]], i32 4, <vscale x 4 x i1> [[MASK]], <vscale x 4 x float> undef)
; CHECK-NEXT:    ret <vscale x 4 x float> [[RES]]
;
; DISABLED-LABEL: @scalable.load.nxv4f32(
; DISABLED-NEXT:    [[RES:%.*]] = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr [[P:%.*]], i32 4, <vscale x 4 x i1> [[MASK:%.*]], <vscale x 4 x float> undef)
; DISABLED-NEXT:    ret <vscale x 4 x float> [[RES]]
;
  %res = tail call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0(ptr %p, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> undef)
  ret <vscale x 4 x float> %res
}

define void @scalable.store.nxv4f32(ptr %p, <vscale x 4 x float> %arg, <vscale x 4 x i1> %mask) sanitize_address {
; CHECK-LABEL: @scalable.store.nxv4f32(
; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 4
; CHECK-NEXT:    br label [[DOTSPLIT:%.*]]
; CHECK:       .split:
; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[TMP0:%.*]] ], [ [[IV_NEXT:%.*]], [[TMP7:%.*]] ]
; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <vscale x 4 x i1> [[MASK:%.*]], i64 [[IV]]
; CHECK-NEXT:    br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP7]]
; CHECK:       4:
; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <vscale x 4 x float>, ptr [[P:%.*]], i64 0, i64 [[IV]]
; CHECK-NEXT:    [[TMP6:%.*]] = ptrtoint ptr [[TMP5]] to i64
; CHECK-NEXT:    call void @__asan_store4(i64 [[TMP6]])
; CHECK-NEXT:    br label [[TMP7]]
; CHECK:       7:
; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
; CHECK-NEXT:    [[IV_CHECK:%.*]] = icmp eq i64 [[IV_NEXT]], [[TMP2]]
; CHECK-NEXT:    br i1 [[IV_CHECK]], label [[DOTSPLIT_SPLIT:%.*]], label [[DOTSPLIT]]
; CHECK:       .split.split:
; CHECK-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P]], i32 4, <vscale x 4 x i1> [[MASK]])
; CHECK-NEXT:    ret void
;
; DISABLED-LABEL: @scalable.store.nxv4f32(
; DISABLED-NEXT:    tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> [[ARG:%.*]], ptr [[P:%.*]], i32 4, <vscale x 4 x i1> [[MASK:%.*]])
; DISABLED-NEXT:    ret void
;
  tail call void @llvm.masked.store.nxv4f32.p0(<vscale x 4 x float> %arg, ptr %p, i32 4, <vscale x 4 x i1> %mask)
  ret void
}
